import torch as th
import numpy as np
import pdb

def build_td_lambda_targets(rewards, terminated, mask, target_qs, gamma, td_lambda):
    # Assumes  <target_qs > in B*T*A and <reward >, <terminated >, <mask > in (at least) B*T-1*1
    # Initialise  last  lambda -return  for  not  terminated  episodes
    ret = target_qs.new_zeros(*target_qs.shape)
    ret[:, -1] = target_qs[:, -1] * (1 - th.sum(terminated, dim=1))
    # Backwards  recursive  update  of the "forward  view"
    for t in range(ret.shape[1] - 2, -1,  -1):
        ret[:, t] = td_lambda * gamma * ret[:, t + 1] + mask[:, t] \
                    * (rewards[:, t] + (1 - td_lambda) * gamma * target_qs[:, t + 1] * (1 - terminated[:, t]))
    # Returns lambda-return from t=0 to t=T-1, i.e. in B*T-1*A
    return ret

def build_td_lambda_targets_with_weights(rewards, terminated, mask, target_qs, gamma, td_lambda):
    # Assumes  <target_qs > in B*T*A and <reward >, <terminated >, <mask > in (at least) B*T-1*1
    # Initialise  last  lambda -return  for  not  terminated  episodes


    ret = np.zeros_like(target_qs)

    

    if ret.shape[1] > td_lambda.shape[1]:
        expand_td_lambda = np.zeros_like(ret)
        expand_td_lambda[:, :td_lambda.shape[1]] = td_lambda
    else:
        expand_td_lambda = td_lambda

    ret[:, -1] = target_qs[:, -1] * (1 - terminated[:, -1])
    # Backwards  recursive  update  of the "forward  view"
    for t in range(ret.shape[1] - 2, -1,  -1):
        ret[:, t] = expand_td_lambda[:, t] * gamma * ret[:, t + 1] + mask[:, t] \
                    * (rewards[:, t] + (1 - expand_td_lambda[:, t]) * gamma * target_qs[:, t + 1] * (1 - terminated[:, t]))
    # Returns lambda-return from t=0 to t=T-1, i.e. in B*T-1*A
    
    return ret